
clear all
set more off
cap log close

log using "$log/06_02_create_u_rate_SIAB_all_workers.log", replace

timer clear 1
timer on 1
*label lang en 

use "$orig\SIAB_7519_v1.dta", clear

set seed 666

tab ausbildung_imp, mis
replace ausbildung_imp = . if ausbildung_imp == -7
replace ausbildung_imp = 5 if ausbildung_imp ==  6

tab ausbildung_imp, mis
tab ausbildung if missing(ausbildung_imp), mis
tab schule if missing(ausbildung_imp), mis

replace ausbildung_imp = 1 if missing(ausbildung_imp) &  inlist(ausbildung,1,7,8)   & inlist(schule,1,4,6)
replace ausbildung_imp = 2 if missing(ausbildung_imp) &  inlist(ausbildung,2,3,4,9,25) & inlist(schule,1,4,6) 
replace ausbildung_imp = 3 if missing(ausbildung_imp) &  inlist(ausbildung,1,7,8)   & inlist(schule,7,8,9) 
replace ausbildung_imp = 4 if missing(ausbildung_imp) &  inlist(ausbildung,2,3,4,9,25) & inlist(schule,7,8,9) 
replace ausbildung_imp = 5 if missing(ausbildung_imp) & (inrange(ausbildung,10,24)  | inrange(ausbildung,5,6))
count if missing(ausbildung_imp)
tab ausbildung_imp, mis

sort persnr spell
replace ausbildung_imp = ausbildung_imp[_n-1] if missing(ausbildung_imp) & persnr == persnr[_n-1]
replace ausbildung_imp = ausbildung_imp[_n-1] if ausbildung_imp < ausbildung_imp[_n-1] & persnr == persnr[_n-1]

count if missing(ausbildung_imp)
tab ausbildung_imp, mis


gen byte x = (inlist(ausbildung_imp,2,4))
bysort persnr: egen byte trained = max(x)
drop x 
tab trained, mis

tab ausbildung_imp, mis
drop ausbildung_imp

gen year = year(endepi)
gen spell_length = endepi-begepi+1

gen byte age = year(begepi)-gebjahr
drop gebjahr
drop if age < 15
drop if age > 66

*Youth variable
gen byte youth = (age<30)
drop age

*Create two digit occupation code (KlDB system)
replace beruf = . if beruf< 0 
gen occ_2dig = floor(beruf/10)
tab occ_2dig, mis
drop beruf

*Merge industry and state
rename year jahr
merge m:1 betnr jahr using "$orig\SIAB_7519_v1_bhp_basis_v2.dta", keepusing(w08_3_gen ao_bula) 
drop if _merge == 2
drop _merge
drop jahr
compress

gen ind_2dig = floor(w08_3_gen/10)
drop w08_3_gen

sort persnr spell
replace occ_2dig = occ_2dig[_n-1] if missing(occ_2dig) & !missing(occ_2dig[_n-1]) & persnr==persnr[_n-1] // assign previous job occupation
replace ind_2dig = ind_2dig[_n-1] if missing(ind_2dig) & !missing(ind_2dig[_n-1]) & persnr==persnr[_n-1] // assign previous industry
replace ao_bula  = ao_bula[_n-1]  if missing(ao_bula)  & !missing(ao_bula[_n-1])  & persnr==persnr[_n-1]

drop spell

drop if missing(occ_2dig)
drop if missing(ind_2dig)
drop if missing(ao_bula)

compress

*Drop if out of labor force
tab erwstat
replace erwstat = . if erwstat == 12 & year(endorig)>=2005
keep if ///
( erwstat == 101 | erwstat == 140 | erwstat== 143) | /// gr == 1: Sozialversicherungspfl. Beschäftigte
((erwstat == 109 | erwstat == 209) & year(begorig >= 1999)) | /// gr = 3: Geringfügig Beschäftigte
((erwstat == 103 | erwstat == 142) & year(begorig >= 1999)) | /// gr= 4: Altersteilzeit
((erwstat == 205 | erwstat == 118) & year(begorig >= 1999)) | /// gr = 6: Unständig Beschäftigte
( erwstat == 104 | erwstat == 112 | erwstat == 119 | erwstat == 120 | erwstat == 123 | erwstat == 124 | erwstat == 149 |     erwstat == 201 | erwstat == 203 | erwstat == 599) | /// gr = 7, merged with 6: Sonstige
erwstat == 1 | /// gr==11
erwstat == 2 | /// gr==12
erwstat == 31 | erwstat == 41 | erwstat == 51 | ///  gr==21
erwstat == 102 | erwstat== 141 // keep people in training, gr == 2
tab erwstat, mis

*Unemployment indicator
gen U_epi = (erwstat<100)
tab U_epi, mis

drop erwstat

gen year = year(begepi)
keep if inrange(year,1990,2019)
drop year

compress

keep persnr begepi endepi U_epi ao_bula occ_2dig ind_2dig trained youth spell_length

save "$temp/test_09_SIAB_all_workers", replace

******************************
* ASSIGN UNEMPLOYMENT STATUS *
******************************

*Start and end
global start = 1990
global end = 2019

local day = 15
local months "jan feb mar apr may jun jul aug sep oct nov dec"
forval year = $start (1) $end {
		use "$temp/test_09_SIAB_all_workers", clear
		display "`year'"
		keep if year(begepi)<=`year' & year(endepi)>=`year'	
		
	foreach month in `months' {
		display "`month' `year'"
		qui {
		preserve 
			local refdate = date("`day'`month'`year'","DMY")
			keep if begepi<=`refdate' & endepi>=`refdate'		/* Keep if spell covers reference date */
			bysort persnr: egen U = min(U_epi)				  	/* If employed anywhere, count as E */
			keep if U==U_epi 									/* Keep spells that match status */
			bysort persnr: egen max_spell = max(spell_length)	/* Find max spell w/i status */
			keep if spell_length==max_spell						/* Keep longest spell to assign state */
			duplicates drop persnr, force
			gen obs = 1
			tab U
			collapse U (sum) obs, by(ao_bula occ_2dig ind_2dig trained youth)
			gen day = date("`day'`month'`year'","DMY")
			format day %td
			gen month = mofd(day)
			format month %tm
			tempfile `month'`year'
			save ``month'`year'', replace
		restore
		}
		}
	
	}

*Append months
clear
forval year = $start (1) $end {
	foreach month in `months' {
		append using ``month'`year''
		}
	}

gen year = year(day)
drop day
gen U_w = U*obs

rename ao_bula region_entry
rename year year_entry

keep if inrange(year_entry,1997,2019)

save "$temp/U_internal_source_all_workers.dta", replace


*Checks
collapse (sum) U_w obs, by(year_entry youth region_entry)
gen U_internal = 100*U_w/obs
merge m:1 region_entry year_entry using "$data/U_regional.dta"
rename region_entry state
rename year_entry year

corr U_internal Uy_oecd if youth==1
corr U_internal U_oecd  if youth==0

reg U_internal Uy_oecd i.state if youth==1, cl(state)
reg U_internal U_oecd  i.state if youth==0, cl(state)

reg Uy_oecd U_internal  i.state if youth==1, cl(state)
reg U_oecd U_internal   i.state if youth==0, cl(state)

local cutoff = 20

*Overall
use "$temp\U_internal_source_all_workers.dta", clear
collapse (sum) U_w obs, by(year_entry region_entry)
gen U_internal = 100*U_w/obs
xtset region_entry year_entry
tssmooth ma U_internal01 = U_internal, window(0 1 1)
keep year_entry region_entry obs U_internal*
drop if obs<`cutoff'
save "$temp\U_internal_all_workers.dta", replace

clear
cap log close
